ConcordanceΒΆ
Find the ten most commonly used words in a text file.
from re import sub
def concordance(text):
freq = {}
for word in text.split():
word = sub('[^\w]', '', word.lower())
if word not in freq:
freq[word] = 0
freq[word] += 1
return freq
txt_file_name = 'paradise-lost.txt'
with open(txt_file_name) as f:
text = ''.join(f)
freq = concordance(text)
sorted(freq.items(), key=lambda kv: kv[1], reverse=True)[:10]
Output:
[('and', 3483),
('the', 3162),
('to', 2326),
('of', 2186),
('in', 1430),
('with', 1208),
('his', 1181),
('or', 795),
('that', 720),
('all', 712)]